"""
Author: YSR

Created: 2024-12-21

Description: 提取网页中的题目 用于复习
"""
from lxml import etree


class Document:
    def __init__(self, html):
        self.html = html

    def extract(self) -> list:
        tree = etree.HTML(self.html)
        topics = tree.xpath('//div[@class="aiAreaContent"]')
        lst = []
        for topic in topics:
            name = "".join(topic.xpath('.//h3[@class="mark_name colorDeep"]/text()'))
            name = name.strip().replace(" ", "").replace("\n", "")
            if len(name) < 5:
                name = topic.xpath('.//text()')
                name = f"{name[1]}{name[4]}"
                name = name.strip().replace(" ", "").replace("\n", "")
            options = topic.xpath('.//*[@class="mark_letter colorDeep"]/li/text()')
            options = [option.strip().replace(" ", "").replace("\n", "") for option in options]
            answer = topic.xpath('.//*[@class="colorGreen marginRight40 fl"]/text()')
            answer = "".join(answer).strip().replace(" ", "").replace("\n", "")
            obj = {
                "name": name,
                "options": options,
                "answer": answer
            }
            lst.append(obj)
        return lst
